This is the code for the statistical analysis for “Vowel Acoustics as Predictors of Speech Intelligibility in Dysarthria.”
library(rio)
The following rio suggested packages are not installed: ‘csvy’, ‘feather’, ‘fst’, ‘hexView’, ‘readODS’, ‘rmatio’
Use 'install_formats()' to install them
library(tidyverse)
Registered S3 methods overwritten by 'dbplyr':
method from
print.tbl_lazy
print.tbl_sql
── Attaching packages ────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
✓ ggplot2 3.3.5 ✓ purrr 0.3.4
✓ tibble 3.1.6 ✓ dplyr 1.0.7
✓ tidyr 1.1.3 ✓ stringr 1.4.0
✓ readr 1.4.0 ✓ forcats 0.5.1
── Conflicts ───────────────────────────────────────────────────────────── tidyverse_conflicts() ──
x dplyr::filter() masks stats::filter()
x dplyr::lag() masks stats::lag()
library(irr) # install.packages('irr')
Loading required package: lpSolve
library(performance)
Attaching package: ‘performance’
The following object is masked from ‘package:irr’:
icc
library(car)
Loading required package: carData
Registered S3 methods overwritten by 'car':
method from
influence.merMod lme4
cooks.distance.influence.merMod lme4
dfbeta.influence.merMod lme4
dfbetas.influence.merMod lme4
Attaching package: ‘car’
The following object is masked from ‘package:dplyr’:
recode
The following object is masked from ‘package:purrr’:
some
library(ggpubr)
library("Hmisc") # install.packages('Hmisc')
Loading required package: lattice
Loading required package: survival
Loading required package: Formula
Attaching package: ‘Hmisc’
The following objects are masked from ‘package:dplyr’:
src, summarize
The following objects are masked from ‘package:base’:
format.pval, units
library(ggridges)
library(furniture) # install.packages('furniture')
library(gt)
Attaching package: ‘gt’
The following object is masked from ‘package:Hmisc’:
html
library(patchwork)
library(ks)
library(emuR) # install.packages('emuR')
Attaching package: ‘emuR’
The following object is masked from ‘package:Hmisc’:
label
The following object is masked from ‘package:car’:
ellipse
The following object is masked from ‘package:base’:
norm
library(geomtextpath) # remotes::install_github("AllanCameron/geomtextpath")
Reliability <- rio::import("Prepped Data/Reliability Data.csv")
AcousticData <- rio::import("Prepped Data/AcousticMeasures.csv") %>%
dplyr::mutate(intDiff = VAS - transAcc)
AcousticData <- AcousticData %>%
dplyr::filter(!grepl("_rel", Speaker)) %>%
dplyr::select(c(Speaker, Sex, Etiology, vowel_ED_b, VSA_b, Hull_b, Hull_bVSD_25, Hull_bVSD_50, Hull_bVSD_75, VAS, transAcc)) %>%
dplyr::mutate(Etiology = as.factor(Etiology),
Sex = as.factor(Sex),
Speaker = as.factor(Speaker))
Listeners <- rio::import("Prepped Data/Listener_Demographics.csv") %>%
dplyr::select(!c(StartDate:proloficID, Q2.4_6_TEXT, Q3.2_8_TEXT, AudioCheck:EP3))
Listeners$race[Listeners$Q3.3_7_TEXT == "Native American/ African amercing"] <- "Biracial or Multiracial"
Two raters (the first two authors) completed vowel segmentation for the speakers. To calculate inter-rater reliability, 20% of the speakers were segmented again by the other rater. Two-way intraclass coefficients were computed for the extracted F1 and F2 from the temporal midpoint of the vowel segments. Since only one set of ratings will be used in the data analysis, we focus on the single ICC results and interpretation. However, we also report the average ICC values to be comprehensive.
## Creating new data frames to calculate ICC for extracted F1 and F2 values
F1_Rel <- Reliability %>%
dplyr::select(c(F1, F1_rel))
F2_Rel <- Reliability %>%
dplyr::select(c(F2, F2_rel))
## Single ICC for F1
Single_F1 <- irr::icc(F1_Rel, model = "twoway", type = "agreement", unit = "single")
## Average ICC for F1
Average_F1 <- irr::icc(F1_Rel, model = "twoway", type = "agreement", unit = "average")
## Single ICC for F2
Single_F2 <- irr::icc(F2_Rel, model = "twoway", type = "agreement", unit = "single")
## Average ICC for F2
Average_F2 <- irr::icc(F2_Rel, model = "twoway", type = "agreement", unit = "average")
## Inter-rater reliability results and interpretation
print(paste("Single ICC for F1 is ", round(Single_F1$value, digits = 3), ". ",
"The 95% CI is [", round(Single_F1$lbound, digits = 3), " - ", round(Single_F1$ubound, digits = 3), "].", sep = ""))
[1] "Single ICC for F1 is 0.866. The 95% CI is [0.837 - 0.89]."
print(paste("Single ICC for F2 is ", round(Single_F2$value, digits = 3), ". ",
"The 95% CI is [", round(Single_F2$lbound, digits = 3), " - ", round(Single_F2$ubound, digits = 3), "].", sep = ""))
[1] "Single ICC for F2 is 0.931. The 95% CI is [0.916 - 0.944]."
print(paste("Average ICC for F1 is ", round(Average_F1$value, digits = 3), ". ",
"The 95% CI is [", round(Average_F1$lbound, digits = 3), " - ", round(Average_F1$ubound, digits = 3), "].", sep = ""))
[1] "Average ICC for F1 is 0.928. The 95% CI is [0.911 - 0.942]."
print(paste("Average ICC for F2 is ", round(Average_F2$value, digits = 3), ". ",
"The 95% CI is [", round(Average_F2$lbound, digits = 3), " - ", round(Average_F2$ubound, digits = 3), "].", sep = ""))
[1] "Average ICC for F2 is 0.964. The 95% CI is [0.956 - 0.971]."
print("Thus, interrater reliability for the extracted F1 and F2 values from the vowel segments was good to excellent.")
[1] "Thus, interrater reliability for the extracted F1 and F2 values from the vowel segments was good to excellent."
## Removing extra data frames from environment
rm(F1_Rel, F2_Rel, Reliability, Single_F1, Single_F2, Average_F1, Average_F2)
CorrMatrix <- AcousticData %>%
dplyr::select(VSA_b, vowel_ED_b, Hull_b, Hull_bVSD_25, Hull_bVSD_50, Hull_bVSD_75, VAS, transAcc) %>%
as.matrix() %>%
Hmisc::rcorr()
CorrMatrix <- CorrMatrix$r
stats::cor.test(AcousticData$VSA_b, AcousticData$vowel_ED_b, method = "pearson")
Pearson's product-moment correlation
data: AcousticData$VSA_b and AcousticData$vowel_ED_b
t = 6.5285, df = 38, p-value = 1.076e-07
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
0.5372669 0.8468014
sample estimates:
cor
0.7270881
stats::cor.test(AcousticData$Hull_b, AcousticData$Hull_bVSD_25, method = "pearson")
Pearson's product-moment correlation
data: AcousticData$Hull_b and AcousticData$Hull_bVSD_25
t = 9.4906, df = 38, p-value = 1.43e-11
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
0.7135136 0.9119080
sample estimates:
cor
0.838625
write.csv(CorrMatrix, file = "Tables/Correlation Matrix.csv")
rm(CorrMatrix)
Model 1
# Specifying Model 1
OT_Model1 <- lm(transAcc ~ Hull_bVSD_25, data = AcousticData)
## Model 1 Assumptions
performance::check_model(OT_Model1)
## Model 1 Summary
summary(OT_Model1)
Call:
lm(formula = transAcc ~ Hull_bVSD_25, data = AcousticData)
Residuals:
Min 1Q Median 3Q Max
-47.896 -14.090 5.996 17.470 36.105
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 48.2973 9.7372 4.960 1.5e-05 ***
Hull_bVSD_25 0.6417 0.5663 1.133 0.264
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 23.83 on 38 degrees of freedom
Multiple R-squared: 0.03268, Adjusted R-squared: 0.007224
F-statistic: 1.284 on 1 and 38 DF, p-value: 0.2643
Model 2
## Specifying Model 2
OT_Model2 <- lm(transAcc ~ Hull_bVSD_25 + Hull_bVSD_75, data = AcousticData)
## Model 2 Assumption Check
performance::check_model(OT_Model2)
## Model 2 Summary
summary(OT_Model2)
Call:
lm(formula = transAcc ~ Hull_bVSD_25 + Hull_bVSD_75, data = AcousticData)
Residuals:
Min 1Q Median 3Q Max
-48.854 -13.962 5.567 16.758 36.257
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 47.3374 10.3316 4.582 5.09e-05 ***
Hull_bVSD_25 0.8045 0.7781 1.034 0.308
Hull_bVSD_75 -0.7188 2.3225 -0.309 0.759
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 24.11 on 37 degrees of freedom
Multiple R-squared: 0.03518, Adjusted R-squared: -0.01698
F-statistic: 0.6745 on 2 and 37 DF, p-value: 0.5156
## Model 1 and Model 2 Comparison
anova(OT_Model1, OT_Model2)
Analysis of Variance Table
Model 1: transAcc ~ Hull_bVSD_25
Model 2: transAcc ~ Hull_bVSD_25 + Hull_bVSD_75
Res.Df RSS Df Sum of Sq F Pr(>F)
1 38 21570
2 37 21514 1 55.696 0.0958 0.7587
Model 3
## Specifying Model 3
OT_Model3 <- lm(transAcc ~ Hull_bVSD_25 + Hull_bVSD_75 + Hull_b, data = AcousticData)
## Model 3 Assumption Check
performance::check_model(OT_Model3)
## Model 3 Summary
summary(OT_Model3)
Call:
lm(formula = transAcc ~ Hull_bVSD_25 + Hull_bVSD_75 + Hull_b,
data = AcousticData)
Residuals:
Min 1Q Median 3Q Max
-56.706 -13.157 7.018 17.957 29.990
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 31.0806 14.4916 2.145 0.0388 *
Hull_bVSD_25 -0.8439 1.2984 -0.650 0.5199
Hull_bVSD_75 0.3159 2.3714 0.133 0.8948
Hull_b 1.2941 0.8247 1.569 0.1253
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 23.65 on 36 degrees of freedom
Multiple R-squared: 0.09695, Adjusted R-squared: 0.0217
F-statistic: 1.288 on 3 and 36 DF, p-value: 0.2932
## Model 2 and Model 3 Comparison
anova(OT_Model2, OT_Model3)
Analysis of Variance Table
Model 1: transAcc ~ Hull_bVSD_25 + Hull_bVSD_75
Model 2: transAcc ~ Hull_bVSD_25 + Hull_bVSD_75 + Hull_b
Res.Df RSS Df Sum of Sq F Pr(>F)
1 37 21514
2 36 20137 1 1377.5 2.4626 0.1253
Model 4
## Specifying Model 4
OT_Model4 <- lm(transAcc ~ Hull_bVSD_25 + Hull_bVSD_75 + Hull_b + VSA_b, data = AcousticData)
## Model 4 Assumption Check
performance::check_model(OT_Model4)
## Model 4 Summary
summary(OT_Model4)
Call:
lm(formula = transAcc ~ Hull_bVSD_25 + Hull_bVSD_75 + Hull_b +
VSA_b, data = AcousticData)
Residuals:
Min 1Q Median 3Q Max
-44.796 -11.144 3.042 12.567 34.297
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 27.4533 13.1156 2.093 0.04365 *
Hull_bVSD_25 -1.2602 1.1782 -1.070 0.29214
Hull_bVSD_75 0.5663 2.1390 0.265 0.79274
Hull_b 0.7364 0.7654 0.962 0.34261
VSA_b 6.0896 1.9954 3.052 0.00432 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 21.32 on 35 degrees of freedom
Multiple R-squared: 0.2868, Adjusted R-squared: 0.2052
F-statistic: 3.518 on 4 and 35 DF, p-value: 0.01626
## Model 3 and Model 4 Comparison
anova(OT_Model3, OT_Model4)
Analysis of Variance Table
Model 1: transAcc ~ Hull_bVSD_25 + Hull_bVSD_75 + Hull_b
Model 2: transAcc ~ Hull_bVSD_25 + Hull_bVSD_75 + Hull_b + VSA_b
Res.Df RSS Df Sum of Sq F Pr(>F)
1 36 20137
2 35 15904 1 4232.4 9.314 0.004321 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Model 5
## Specifying Model 5
OT_Model5 <- lm(transAcc ~ Hull_bVSD_25 + Hull_bVSD_75 + Hull_b + VSA_b + vowel_ED_b, data = AcousticData)
## Model 4 Assumption Check
performance::check_model(OT_Model5)
## Model 4 Summary
summary(OT_Model5)
Call:
lm(formula = transAcc ~ Hull_bVSD_25 + Hull_bVSD_75 + Hull_b +
VSA_b + vowel_ED_b, data = AcousticData)
Residuals:
Min 1Q Median 3Q Max
-46.533 -11.028 3.327 13.017 33.227
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 20.3598 21.5072 0.947 0.3505
Hull_bVSD_25 -1.2567 1.1924 -1.054 0.2994
Hull_bVSD_75 0.7007 2.1882 0.320 0.7508
Hull_b 0.6895 0.7826 0.881 0.3845
VSA_b 5.3903 2.6194 2.058 0.0473 *
vowel_ED_b 5.5182 13.1650 0.419 0.6777
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 21.57 on 34 degrees of freedom
Multiple R-squared: 0.2904, Adjusted R-squared: 0.1861
F-statistic: 2.783 on 5 and 34 DF, p-value: 0.03271
## Model 3 and Model 4 Comparison
anova(OT_Model4, OT_Model5)
Analysis of Variance Table
Model 1: transAcc ~ Hull_bVSD_25 + Hull_bVSD_75 + Hull_b + VSA_b
Model 2: transAcc ~ Hull_bVSD_25 + Hull_bVSD_75 + Hull_b + VSA_b + vowel_ED_b
Res.Df RSS Df Sum of Sq F Pr(>F)
1 35 15904
2 34 15823 1 81.764 0.1757 0.6777
## Specifying Final Model
OT_Model_final <- lm(transAcc ~ VSA_b, data = AcousticData)
## Final Model Assumption Check
performance::check_model(OT_Model_final)
## Final Model Summary
summary(OT_Model_final)
Call:
lm(formula = transAcc ~ VSA_b, data = AcousticData)
Residuals:
Min 1Q Median 3Q Max
-46.72 -12.69 2.97 14.37 35.39
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 32.508 7.857 4.138 0.000187 ***
VSA_b 5.872 1.613 3.641 0.000807 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 20.86 on 38 degrees of freedom
Multiple R-squared: 0.2586, Adjusted R-squared: 0.2391
F-statistic: 13.25 on 1 and 38 DF, p-value: 0.0008068
confint(OT_Model_final)
2.5 % 97.5 %
(Intercept) 16.602765 48.413532
VSA_b 2.606927 9.137097
Model 1
# Specifying Model 1
VAS_Model1 <- lm(VAS ~ Hull_bVSD_25, data = AcousticData)
## Model 1 Assumptions
performance::check_model(VAS_Model1)
## Model 1 Summary
summary(VAS_Model1)
Call:
lm(formula = VAS ~ Hull_bVSD_25, data = AcousticData)
Residuals:
Min 1Q Median 3Q Max
-47.625 -16.684 8.462 19.440 37.352
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 42.6328 10.7512 3.965 0.000313 ***
Hull_bVSD_25 0.5877 0.6253 0.940 0.353236
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 26.31 on 38 degrees of freedom
Multiple R-squared: 0.02272, Adjusted R-squared: -0.003001
F-statistic: 0.8833 on 1 and 38 DF, p-value: 0.3532
Model 2
## Specifying Model 2
VAS_Model2 <- lm(VAS ~ Hull_bVSD_25 + Hull_bVSD_75, data = AcousticData)
## Model 2 Assumption Check
performance::check_model(VAS_Model2)
## Model 2 Summary
summary(VAS_Model2)
Call:
lm(formula = VAS ~ Hull_bVSD_25 + Hull_bVSD_75, data = AcousticData)
Residuals:
Min 1Q Median 3Q Max
-47.850 -16.576 8.382 19.448 37.237
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 42.3384 11.4211 3.707 0.000684 ***
Hull_bVSD_25 0.6376 0.8602 0.741 0.463195
Hull_bVSD_75 -0.2204 2.5674 -0.086 0.932036
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 26.66 on 37 degrees of freedom
Multiple R-squared: 0.02291, Adjusted R-squared: -0.0299
F-statistic: 0.4338 on 2 and 37 DF, p-value: 0.6513
## Model 1 and Model 2 Comparison
anova(VAS_Model1, VAS_Model2)
Analysis of Variance Table
Model 1: VAS ~ Hull_bVSD_25
Model 2: VAS ~ Hull_bVSD_25 + Hull_bVSD_75
Res.Df RSS Df Sum of Sq F Pr(>F)
1 38 26296
2 37 26291 1 5.239 0.0074 0.932
Model 3
## Specifying Model 3
VAS_Model3 <- lm(VAS ~ Hull_bVSD_25 + Hull_bVSD_75 + Hull_b, data = AcousticData)
## Model 3 Assumption Check
performance::check_model(VAS_Model3)
## Model 3 Summary
summary(VAS_Model3)
Call:
lm(formula = VAS ~ Hull_bVSD_25 + Hull_bVSD_75 + Hull_b, data = AcousticData)
Residuals:
Min 1Q Median 3Q Max
-56.444 -18.989 6.121 18.036 32.281
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 25.0400 16.0599 1.559 0.128
Hull_bVSD_25 -1.1164 1.4389 -0.776 0.443
Hull_bVSD_75 0.8805 2.6280 0.335 0.740
Hull_b 1.3770 0.9139 1.507 0.141
Residual standard error: 26.21 on 36 degrees of freedom
Multiple R-squared: 0.08087, Adjusted R-squared: 0.00428
F-statistic: 1.056 on 3 and 36 DF, p-value: 0.3799
## Model 2 and Model 3 Comparison
anova(VAS_Model2, VAS_Model3)
Analysis of Variance Table
Model 1: VAS ~ Hull_bVSD_25 + Hull_bVSD_75
Model 2: VAS ~ Hull_bVSD_25 + Hull_bVSD_75 + Hull_b
Res.Df RSS Df Sum of Sq F Pr(>F)
1 37 26291
2 36 24731 1 1559.6 2.2702 0.1406
Model 4
## Specifying Model 4
VAS_Model4 <- lm(VAS ~ Hull_bVSD_25 + Hull_bVSD_75 + Hull_b + VSA_b, data = AcousticData)
## Model 4 Assumption Check
performance::check_model(VAS_Model4)
## Model 4 Summary
summary(VAS_Model4)
Call:
lm(formula = VAS ~ Hull_bVSD_25 + Hull_bVSD_75 + Hull_b + VSA_b,
data = AcousticData)
Residuals:
Min 1Q Median 3Q Max
-42.25 -14.17 5.76 16.26 41.48
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 21.0802 14.5922 1.445 0.15746
Hull_bVSD_25 -1.5708 1.3109 -1.198 0.23885
Hull_bVSD_75 1.1539 2.3798 0.485 0.63078
Hull_b 0.7682 0.8516 0.902 0.37320
VSA_b 6.6479 2.2200 2.995 0.00502 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 23.72 on 35 degrees of freedom
Multiple R-squared: 0.2683, Adjusted R-squared: 0.1847
F-statistic: 3.209 on 4 and 35 DF, p-value: 0.02405
## Model 3 and Model 4 Comparison
anova(VAS_Model3, VAS_Model4)
Analysis of Variance Table
Model 1: VAS ~ Hull_bVSD_25 + Hull_bVSD_75 + Hull_b
Model 2: VAS ~ Hull_bVSD_25 + Hull_bVSD_75 + Hull_b + VSA_b
Res.Df RSS Df Sum of Sq F Pr(>F)
1 36 24731
2 35 19687 1 5043.9 8.9671 0.00502 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Model 5
## Specifying Model 5
VAS_Model5 <- lm(VAS ~ Hull_bVSD_25 + Hull_bVSD_75 + Hull_b + VSA_b + vowel_ED_b, data = AcousticData)
## Model 5 Assumption Check
performance::check_model(VAS_Model5)
## Model 5 Summary
summary(VAS_Model5)
Call:
lm(formula = VAS ~ Hull_bVSD_25 + Hull_bVSD_75 + Hull_b + VSA_b +
vowel_ED_b, data = AcousticData)
Residuals:
Min 1Q Median 3Q Max
-44.569 -13.697 5.187 16.183 40.204
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 12.6419 23.9197 0.529 0.6006
Hull_bVSD_25 -1.5667 1.3261 -1.181 0.2456
Hull_bVSD_75 1.3137 2.4337 0.540 0.5928
Hull_b 0.7124 0.8704 0.818 0.4188
VSA_b 5.8159 2.9132 1.996 0.0539 .
vowel_ED_b 6.5644 14.6417 0.448 0.6568
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 23.99 on 34 degrees of freedom
Multiple R-squared: 0.2726, Adjusted R-squared: 0.1657
F-statistic: 2.549 on 5 and 34 DF, p-value: 0.0461
## Model 4 and Model 5 Comparison
anova(VAS_Model4, VAS_Model5)
Analysis of Variance Table
Model 1: VAS ~ Hull_bVSD_25 + Hull_bVSD_75 + Hull_b + VSA_b
Model 2: VAS ~ Hull_bVSD_25 + Hull_bVSD_75 + Hull_b + VSA_b + vowel_ED_b
Res.Df RSS Df Sum of Sq F Pr(>F)
1 35 19687
2 34 19572 1 115.7 0.201 0.6568
## Specifying Final Model
VAS_Model_final <- lm(VAS ~ VSA_b, data = AcousticData)
## Final Model Assumption Check
performance::check_model(VAS_Model_final)
## Final Model Summary
summary(VAS_Model_final)
Call:
lm(formula = VAS ~ VSA_b, data = AcousticData)
Residuals:
Min 1Q Median 3Q Max
-44.956 -15.943 6.754 17.153 43.062
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 24.703 8.761 2.820 0.00760 **
VSA_b 6.163 1.798 3.427 0.00148 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 23.26 on 38 degrees of freedom
Multiple R-squared: 0.2361, Adjusted R-squared: 0.216
F-statistic: 11.74 on 1 and 38 DF, p-value: 0.001482
confint(VAS_Model_final)
2.5 % 97.5 %
(Intercept) 6.966936 42.438084
VSA_b 2.521887 9.803467
Model 1
# Specify Model
OT_VAS_model <- lm(transAcc ~ VAS*Etiology + VAS*Sex, data = AcousticData)
# Assumption Check
performance::check_model(OT_VAS_model)
# Model Results
summary(OT_VAS_model)
Call:
lm(formula = transAcc ~ VAS * Etiology + VAS * Sex, data = AcousticData)
Residuals:
Min 1Q Median 3Q Max
-14.910 -4.525 -1.280 5.529 16.932
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 10.8993562 5.7092981 1.909 0.0659 .
VAS 0.8924319 0.1276625 6.991 9.1e-08 ***
EtiologyAtaxic 2.9854239 9.5149402 0.314 0.7559
EtiologyHD 2.1009983 7.3948950 0.284 0.7783
EtiologyPD -2.4903757 10.1590855 -0.245 0.8080
SexM 6.8939642 7.0796700 0.974 0.3380
VAS:EtiologyAtaxic -0.0019088 0.1791048 -0.011 0.9916
VAS:EtiologyHD 0.0007804 0.1453318 0.005 0.9958
VAS:EtiologyPD 0.0320470 0.1732105 0.185 0.8545
VAS:SexM -0.1220661 0.1241230 -0.983 0.3333
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 8.488 on 30 degrees of freedom
Multiple R-squared: 0.9031, Adjusted R-squared: 0.874
F-statistic: 31.06 on 9 and 30 DF, p-value: 8.181e-13
# Specify Final Model
OT_VAS_final <- lm(transAcc ~ VAS, data = AcousticData)
confint(OT_VAS_final)
2.5 % 97.5 %
(Intercept) 8.1038688 19.3870846
VAS 0.7637645 0.9580856
# Model Results
summary(OT_VAS_final)
Call:
lm(formula = transAcc ~ VAS, data = AcousticData)
Residuals:
Min 1Q Median 3Q Max
-13.6882 -4.9316 -0.4408 4.9974 17.2110
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 13.74548 2.78681 4.932 1.64e-05 ***
VAS 0.86093 0.04799 17.938 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 7.873 on 38 degrees of freedom
Multiple R-squared: 0.8944, Adjusted R-squared: 0.8916
F-statistic: 321.8 on 1 and 38 DF, p-value: < 2.2e-16
summary(OT_VAS_final_quadratic)
Call:
lm(formula = transAcc ~ VAS, data = AcousticData %>% dplyr::mutate(VAS = VAS^2))
Residuals:
Min 1Q Median 3Q Max
-18.729 -7.579 1.411 6.587 22.341
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 28.274672 2.894431 9.769 6.52e-12 ***
VAS 0.008956 0.000702 12.759 2.61e-15 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 10.54 on 38 degrees of freedom
Multiple R-squared: 0.8107, Adjusted R-squared: 0.8058
F-statistic: 162.8 on 1 and 38 DF, p-value: 2.608e-15
##Descriptives Table
gtData <- AcousticData %>%
rbind(.,AcousticData %>%
dplyr::mutate(Etiology = "All Etiologies")) %>%
rbind(.,AcousticData %>%
rbind(.,AcousticData %>%
dplyr::mutate(Etiology = "All Etiologies")) %>%
dplyr::mutate(Sex = "All")) %>%
dplyr::mutate(Sex = as.factor(Sex),
Etiology = as.factor(Etiology)) %>%
dplyr::group_by(Sex, Etiology) %>%
dplyr::summarize(VSA_mean = mean(VSA_b, na.rm =T), VSA_sd = sd(VSA_b, na.rm = T),
Disp_mean = mean(vowel_ED_b, na.rm =T), Disp_sd = sd(vowel_ED_b, na.rm =T),
Hull_mean = mean(Hull_b, na.rm =T), Hull_sd = sd(Hull_b, na.rm =T),
VSD25_mean = mean(Hull_bVSD_25, na.rm =T), VSD25_sd = sd(Hull_bVSD_25, na.rm =T),
VSD50_mean = mean(Hull_bVSD_50, na.rm =T), VSD50_sd = sd(Hull_bVSD_50, na.rm =T),
VSD75_mean = mean(Hull_bVSD_75, na.rm =T), VSD75_sd = sd(Hull_bVSD_75, na.rm =T),
VAS_mean = mean(VAS, na.rm =T), VAS_sd = sd(VAS, na.rm =T),
OT_mean = mean(transAcc, na.rm =T), OT_sd = sd(transAcc, na.rm =T)) %>%
pivot_longer(cols = VSA_mean:OT_sd, names_to = "Measure",
values_to = "Value") %>%
dplyr::mutate(meanSD = ifelse(grepl("_mean",Measure),"M","sd"),
Measure = gsub("_mean","",Measure),
Measure = gsub("_sd","",Measure),
Etiology = paste(Etiology,meanSD, sep = "_"),
Sex = case_when(
Sex == "All" ~ "All Speakers",
Sex == "M" ~ "Male",
Sex == "F" ~ "Female"
)) %>%
dplyr::select(!meanSD) %>%
pivot_wider(names_from = Etiology, values_from = "Value") %>%
dplyr::filter(Measure != "VSD50")
`summarise()` has grouped output by 'Sex'. You can override using the `.groups` argument.
gtData %>%
gt::gt(
rowname_col = "Measure",
groupname_col = "Sex",
) %>%
fmt_number(
columns = 'All Etiologies_M':PD_sd,
decimals = 2
) %>%
tab_spanner(
label = "All Etiologies",
columns = c('All Etiologies_M', 'All Etiologies_sd')
) %>%
tab_spanner(
label = "ALS",
columns = c(ALS_M, ALS_sd)
) %>%
tab_spanner(
label = "PD",
columns = c(PD_M, PD_sd)
) %>%
tab_spanner(
label = "HD",
columns = c(HD_M, HD_sd)
) %>%
tab_spanner(
label = "Ataxic",
columns = c(Ataxic_M, Ataxic_sd)
) %>%
cols_label(
'All Etiologies_M' = "M",
'All Etiologies_sd' = "SD",
ALS_M = "M",
ALS_sd = "SD",
PD_M = "M",
PD_sd = "SD",
HD_M = "M",
HD_sd = "SD",
Ataxic_M = "M",
Ataxic_sd = "SD"
) %>%
gtsave("DescriptivesTable.html", path = "Tables")
sjPlot::tab_model(OT_Model1,OT_Model2,OT_Model3, OT_Model4, OT_Model5, OT_Model_final,
show.ci = F,
p.style = "stars",
file = "Tables/OT Models.html")
sjPlot::tab_model(VAS_Model1,VAS_Model2,VAS_Model3, VAS_Model4, VAS_Model5, VAS_Model_final,
show.ci = F,
p.style = "stars",
file = "Tables/VAS Models.html")
sjPlot::tab_model(OT_VAS_model,OT_VAS_final,
show.ci = F,
show.reflvl = TRUE,
p.style = "stars",
file = "Tables/OT and VAS Comparison.html")
formantColor <- "grey"
formantAlpha <- .95
lineColor <- "white"
lineAlpha <- .8
vowelData <- rio::import("Prepped Data/Vowel Data.csv") %>%
dplyr::filter(Speaker == "AF8")
Pitch_PRAAT <- list.files(path = paste("Prepped Data/Example Data/", sep = ""),
pattern = ".Pitch", ignore.case = T) %>%
paste("Prepped Data/Example Data/",., sep = "") %>%
read.delim(., header = F) %>%
dplyr::rename(Pitch = V1) %>%
dplyr::mutate(Pitch = gsub("--undefined--",NA,Pitch),
Pitch = as.numeric(Pitch))
Formants_PRAAT <- list.files(path = paste("Prepped Data/Example Data/", sep = ""),
pattern = "_Formant", ignore.case = T) %>%
paste("Prepped Data/Example Data/",., sep = "") %>%
read.delim(., header = T) %>%
dplyr::select(!c(nformants, B1.Hz., B2.Hz., B3.Hz., F4.Hz., B4.Hz., F5.Hz., B5.Hz.)) %>%
dplyr::rename(Time_s = time.s.,
F1_Hz = F1.Hz.,
F2_Hz = F2.Hz.,
F3_Hz = F3.Hz.) %>%
dplyr::mutate(F1_Hz = ifelse(F1_Hz == 0, NA, F1_Hz),
F2_Hz = ifelse(F2_Hz == 0, NA, F2_Hz),
F3_Hz = ifelse(F3_Hz == 0, NA, F3_Hz)) %>%
dplyr::mutate(F1_Hz = as.numeric(F1_Hz),
F2_Hz = as.numeric(F2_Hz),
F3_Hz = suppressWarnings(as.numeric(F3_Hz)),
Time_ms = Time_s / 1000,
F1_kHz = F1_Hz / 1000,
F2_kHz = F2_Hz / 1000,
F3_kHz = F3_Hz / 1000) %>%
dplyr::select(!Time_s) %>%
dplyr::relocate(Time_ms, .before = F1_Hz) %>%
cbind(.,Pitch_PRAAT)
c <- 2
while(c < NROW(Formants_PRAAT)){
Formants_PRAAT$F1_Hz[c] <- ifelse(is.na(Formants_PRAAT$F1_Hz[c-1]) &&
is.na(Formants_PRAAT$F1_Hz[c+1]),
NA,
Formants_PRAAT$F1_Hz[c])
Formants_PRAAT$F2_Hz[c] <- ifelse(is.na(Formants_PRAAT$F2_Hz[c-1]) &&
is.na(Formants_PRAAT$F2_Hz[c+1]),
NA,
Formants_PRAAT$F2_Hz[c])
c <- c + 1
}
rm(c)
Formants_PRAAT <- Formants_PRAAT %>%
dplyr::filter(!is.na(Pitch)) %>%
dplyr::mutate(F1_mad = (abs(F1_Hz - median(F1_Hz))/ mad(F1_Hz, constant = 1.4826)) > 2.5,
F2_mad = (abs(F2_Hz - median(F2_Hz))/ mad(F2_Hz, constant = 1.4826)) > 2.5) %>%
dplyr::filter(F1_mad == FALSE & F2_mad == FALSE) %>%
dplyr::mutate(mDist = mahalanobis(cbind(.$F1_Hz, .$F2_Hz),
colMeans(cbind(.$F1_Hz, .$F2_Hz)),
cov = cov(cbind(.$F1_Hz, .$F2_Hz))),
mDist_sd = abs(scale(mDist,center = T))) %>%
dplyr::filter(mDist_sd < 2) %>%
dplyr::select(!c(F1_mad, F2_mad, mDist, mDist_sd)) %>%
dplyr::mutate(F1_z = scale(F1_Hz, center = TRUE),
F2_z = scale(F2_Hz, center = TRUE),
F3_z = scale(F3_Hz, center = TRUE),
F1_b = emuR::bark(F1_Hz),
F2_b = emuR::bark(F2_Hz),
F3_b = emuR::bark(F3_Hz))
rm(Pitch_PRAAT)
## Corner Dispersion ----
wedge <- vowelData %>%
dplyr::group_by(Vowel) %>%
dplyr::summarize(mean_F1 = mean(F1_tempMid),
mean_F2 = mean(F2_tempMid),
mean_F1_z = mean(F1_z_tempMid),
mean_F2_z = mean(F2_z_tempMid),
mean_F1_b = mean(F1_b_tempMid),
mean_F2_b = mean(F2_b_tempMid)) %>%
dplyr::filter(Vowel == "v")
corner_dis <- vowelData %>%
dplyr::filter(Vowel != "v") %>%
dplyr::group_by(Vowel) %>%
dplyr::summarize(mean_F1 = mean(F1_tempMid),
mean_F2 = mean(F2_tempMid),
mean_F1_z = mean(F1_z_tempMid),
mean_F2_z = mean(F2_z_tempMid),
mean_F1_b = mean(F1_b_tempMid),
mean_F2_b = mean(F2_b_tempMid)) %>%
dplyr::mutate(Vowel_ED = sqrt((mean_F1-wedge$mean_F1)^2 + (mean_F2-wedge$mean_F2)^2),
Vowel_ED_z = sqrt((mean_F1_z-wedge$mean_F1_z)^2 + (mean_F2_z-wedge$mean_F2_z)^2),
Vowel_ED_b = sqrt((mean_F1_b-wedge$mean_F1_b)^2 + (mean_F2_b-wedge$mean_F2_b)^2))
# Plot Corner Dispersion
# Changing to IPA symbols
corner_dis <- corner_dis %>%
dplyr::mutate(Vowel = dplyr::case_when(
Vowel == "ae" ~ "æ",
TRUE ~ Vowel
))
wedge <- wedge %>%
dplyr::mutate(Vowel = case_when(
Vowel == "v" ~ "ʌ",
TRUE ~ Vowel
))
CDplot <- ggplot(aes(x=F2_b,
y=F1_b),
data = Formants_PRAAT,
inherit.aes = FALSE) +
geom_point(shape = 21,
alpha = formantAlpha,
color = formantColor) +
geom_line(aes(x = mean_F2_b,
y = mean_F1_b),
data = corner_dis %>%
dplyr::select(Vowel:mean_F2_b) %>%
dplyr::filter(Vowel == "i") %>%
rbind(.,wedge),
color = lineColor,
size = 1.5,
alpha = lineAlpha) +
geom_line(aes(x = mean_F2_b,
y = mean_F1_b),
data = corner_dis %>%
dplyr::select(Vowel:mean_F2_b) %>%
dplyr::filter(Vowel == "a") %>%
rbind(.,wedge),
color = lineColor,
size = 1.5,
alpha = lineAlpha) +
geom_line(aes(x = mean_F2_b,
y = mean_F1_b),
data = corner_dis %>%
dplyr::select(Vowel:mean_F2_b) %>%
dplyr::filter(Vowel == "æ") %>%
rbind(.,wedge),
color = lineColor,
size = 1.5,
alpha = lineAlpha) +
geom_line(aes(x = mean_F2_b,
y = mean_F1_b),
data = corner_dis %>%
dplyr::select(Vowel:mean_F2_b) %>%
dplyr::filter(Vowel == "u") %>%
rbind(.,wedge),
color = lineColor,
size = 1.5,
alpha = lineAlpha) +
geom_point(aes(x = mean_F2_b,
y = mean_F1_b,
color = Vowel),
data = corner_dis %>%
dplyr::select(Vowel:mean_F2_b) %>%
rbind(.,wedge),
inherit.aes = FALSE,
size = 5) +
scale_y_reverse() +
scale_x_reverse() +
theme_classic() + labs(title = paste("Corner Dispersion")) + xlab("F2 (bark)") + ylab("F1 (bark)") +
theme(plot.title = element_text(hjust = 0.5),
aspect.ratio = 1) +
scale_color_manual(values = c("a" = "#1AAD77",
"æ" = "#1279B5",
"i" = "#FFBF00",
"u" = "#FD7853",
"ʌ" = "#BF3178"))
CDplot
rm(corner_dis, wedge)
## Vowel Space Area ----
VSA_coords <- vowelData %>%
dplyr::filter(Vowel != "v") %>%
dplyr::group_by(Vowel) %>%
dplyr::summarize(mean_F1 = mean(F1_tempMid),
mean_F2 = mean(F2_tempMid),
mean_F1_z = mean(F1_z_tempMid),
mean_F2_z = mean(F2_z_tempMid),
mean_F1_b = mean(F1_b_tempMid),
mean_F2_b = mean(F2_b_tempMid))
### Plotting VSA
VSA_coords <- VSA_coords %>%
dplyr::mutate(Vowel = case_when(
Vowel == "ae" ~ "æ",
TRUE ~ Vowel
))
VSAplot <- ggplot(aes(x = F2_b,
y = F1_b),
data = Formants_PRAAT,
inherit.aes = FALSE) +
geom_point(shape = 21,
alpha = formantAlpha,
color = formantColor) +
geom_polygon(aes(x = mean_F2_b,
y = mean_F1_b),
data = VSA_coords,
alpha = lineAlpha,
color = lineColor,
fill=NA,
size = 1.5) +
geom_point(aes(x = mean_F2_b,
y = mean_F1_b,
color = Vowel),
data = VSA_coords,
inherit.aes = FALSE,
size = 5) +
scale_y_reverse() +
scale_x_reverse() +
guides(color = FALSE) +
theme_classic() + labs(title = "VSA") + xlab("F2 (bark)") + ylab("F1 (bark)") +
theme(plot.title = element_text(hjust = 0.5),
aspect.ratio = 1) +
scale_color_manual(values = c("a" = "#1AAD77",
"æ" = "#1279B5",
"i" = "#FFBF00",
"u" = "#FD7853"))
`guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
VSAplot
rm(VSA_coords)
## Hull ----
### Plotting Hull
convexCoords <- Formants_PRAAT %>%
dplyr::select(F1_b, F2_b) %>%
as.matrix() %>%
grDevices::chull()
convex <- Formants_PRAAT %>%
slice(convexCoords)
hullPlot <- ggplot(aes(F2_b, F1_b),
data = Formants_PRAAT) +
geom_point(shape = 21,
alpha = formantAlpha,
color = formantColor) +
geom_polygon(data = convex,
alpha = .5,
color = "#1279B5",
fill = NA,
size = 1.5) +
scale_y_reverse() +
scale_x_reverse() +
theme_classic() + labs(title = "VSA Hull") + xlab("F2 (bark)") + ylab("F1 (bark)") +
theme(plot.title = element_text(hjust = 0.5),
aspect.ratio = 1)
hullPlot
## Vowel Space Density ----
## Bark Normalized Density ----
# selecting the bandwidth
H_hpi <- ks::Hpi(x = Formants_PRAAT[,c("F2_b","F1_b")], pilot = "samse", pre = "scale", binned = T)
# compute 2d kde
k <- kde(x = Formants_PRAAT[,c("F2_b","F1_b")],
H = H_hpi,
binned = T,
gridsize = 250)
#density <- k[["estimate"]]
# Before we can plot the density estimate we need to melt it into long format
mat.melted <- data.table::melt(k$estimate)
The melt generic in data.table has been passed a matrix and will attempt to redirect to the relevant reshape2 method; please note that reshape2 is deprecated, and this redirection is now deprecated as well. To continue using melt methods from reshape2 while both libraries are attached, e.g. melt.list, you can prepend the namespace like reshape2::melt(k$estimate). In the next version, this warning will become an error.
names(mat.melted) <- c("x", "y", "density")
# We need to add two more colums to preserve the axes units
mat.melted$F2.b <- rep(k$eval.points[[1]], times = nrow(k$estimate))
mat.melted$F1.b <- rep(k$eval.points[[2]], each = nrow(k$estimate))
mat.melted$density <- scales::rescale(mat.melted$density, to = c(0, 1))
# VSD - 25
nVSD_25 <- mat.melted %>%
dplyr::filter(density > .25) %>%
dplyr::select(F2.b,F1.b, density) %>%
dplyr::rename(Density = density)
convexCoords <- nVSD_25 %>%
dplyr::select(F2.b, F1.b) %>%
as.matrix() %>%
#grDevices::xy.coords() %>%
grDevices::chull()
nconvex_25 <- nVSD_25 %>%
slice(convexCoords)
# VSD - 75
nVSD_75 <- mat.melted %>%
dplyr::filter(density > .75) %>%
dplyr::select(F2.b,F1.b, density) %>%
dplyr::rename(Density = density)
convexCoords <- nVSD_75 %>%
dplyr::select(F2.b, F1.b) %>%
as.matrix() %>%
grDevices::chull()
nconvex_75 <- nVSD_75 %>%
slice(convexCoords)
# Plotting Z Normalized VSD
rf <- colorRampPalette(rev(RColorBrewer::brewer.pal(11, "Spectral")))
r <- rf(32)
plotData <- mat.melted %>%
dplyr::rename(Density = density) %>%
dplyr::mutate(VSDlabel = dplyr::case_when(
Density < .25 ~ "none",
Density > .25 && Density < .75 ~ "VSD25",
TRUE ~ "VSD75"
))
geom.text.size <- 2
VSDplot <- ggplot(data = plotData,
aes(x = F2.b,
y = F1.b,
fill = Density)) +
geom_tile() +
scale_fill_viridis_c() +
#scale_fill_gradientn(colours = r) +
scale_x_reverse(expand = c(0, 0),
breaks = round(seq(min(mat.melted$F2.b),
max(mat.melted$F2.b)))) +
scale_y_reverse(expand = c(0, 0),
breaks = round(seq(min(mat.melted$F1.b),
max(mat.melted$F1.b)))) +
ylab("F1 (bark)") + xlab("F2 (bark)") +
labs(title = "VSD Hull") +
theme(plot.title = element_text(hjust = 0.5),
aspect.ratio = 1) +
geom_polygon(data = nconvex_25, alpha = lineAlpha, color = lineColor, size = 1.5, fill = NA, linetype = 1) +
geom_polygon(data = nconvex_75, alpha = lineAlpha, color = lineColor, size = 1.5, fill = NA, linetype = 2) +
geom_labelcurve(data = data.frame(x = 10.382452, xend = 8.313515, y = 2.874967, yend = 4.127919, Density = 1),
aes(x, y, xend = xend, yend = yend),
hjust = 0.5,
vjust = 0,
curvature = 0,
size = geom.text.size,
label = "VSD 25",
text_only = T,
color = "black",
fill = "white",
rich = TRUE) +
geom_labelcurve(data = data.frame(x = 14.92512, xend = 12.04660, y = 3.95, yend = 6, Density = 1),
aes(x, y, xend = xend, yend = yend),
hjust = 0.3,
vjust = 0,
curvature = 0,
size = geom.text.size,
fill = "white",
label = "VSD 75",
text_only = T,
color = "black")
VSDplot
# Combined Plot
measuresPlot <- VSAplot + CDplot + hullPlot + VSDplot +
patchwork::plot_layout(guides = 'collect',
ncol = 2)
ggsave(filename = "Plots/Measures.png",
plot = measuresPlot,
height = 7,
width = 8,
scale = .8)
formantAlpha <- .20
myPal <- c("#1279B5","#2D2D37")
Pitch_PRAAT <- list.files(path = paste("Prepped Data/Example Data/", sep = ""),
pattern = ".Pitch", ignore.case = T) %>%
paste("Prepped Data/Example Data/",., sep = "") %>%
read.delim(., header = F) %>%
dplyr::rename(Pitch = V1) %>%
dplyr::mutate(Pitch = gsub("--undefined--",NA,Pitch),
Pitch = as.numeric(Pitch))
Formants_PRAAT <- list.files(path = paste("Prepped Data/Example Data/", sep = ""),
pattern = "_Formant", ignore.case = T) %>%
paste("Prepped Data/Example Data/",., sep = "") %>%
read.delim(., header = T) %>%
dplyr::select(!c(nformants, B1.Hz., B2.Hz., B3.Hz., F4.Hz., B4.Hz., F5.Hz., B5.Hz.)) %>%
dplyr::rename(Time_s = time.s.,
F1_Hz = F1.Hz.,
F2_Hz = F2.Hz.,
F3_Hz = F3.Hz.) %>%
dplyr::mutate(F1_Hz = ifelse(F1_Hz == 0, NA, F1_Hz),
F2_Hz = ifelse(F2_Hz == 0, NA, F2_Hz),
F3_Hz = ifelse(F3_Hz == 0, NA, F3_Hz)) %>%
dplyr::mutate(F1_Hz = as.numeric(F1_Hz),
F2_Hz = as.numeric(F2_Hz),
F3_Hz = suppressWarnings(as.numeric(F3_Hz)),
Time_ms = Time_s / 1000,
F1_kHz = F1_Hz / 1000,
F2_kHz = F2_Hz / 1000,
F3_kHz = F3_Hz / 1000,
F1_b = emuR::bark(F1_Hz),
F2_b = emuR::bark(F2_Hz),
F3_b = emuR::bark(F3_Hz)) %>%
dplyr::select(!Time_s) %>%
dplyr::relocate(Time_ms, .before = F1_Hz) %>%
cbind(.,Pitch_PRAAT)
c <- 2
while(c < NROW(Formants_PRAAT)){
Formants_PRAAT$F1_Hz[c] <- ifelse(is.na(Formants_PRAAT$F1_Hz[c-1]) &&
is.na(Formants_PRAAT$F1_Hz[c+1]),
NA,
Formants_PRAAT$F1_Hz[c])
Formants_PRAAT$F2_Hz[c] <- ifelse(is.na(Formants_PRAAT$F2_Hz[c-1]) &&
is.na(Formants_PRAAT$F2_Hz[c+1]),
NA,
Formants_PRAAT$F2_Hz[c])
c <- c + 1
}
rm(c)
# Raw Formants ----
f1 <- ggplot(aes(x=F2_b,
y=F1_b),
data = Formants_PRAAT) +
geom_point(shape = 21, color = myPal[2]) +
scale_y_reverse() +
scale_x_reverse() +
scale_color_manual(values = myPal) +
theme_classic() + labs(title = paste("Raw Formant Values")) + xlab("F2 (bark)") + ylab("F1 (bark)") +
theme(plot.title = element_text(hjust = 0.5),
aspect.ratio = 1,
legend.title = element_blank(),
legend.text = element_text(size=12))
# Step #1: Voiced Segments ----
plotData <- Formants_PRAAT %>%
dplyr::mutate(isOutlier = case_when(
is.na(Pitch) ~ "Removed",
TRUE ~ "Retained"
))
f2 <- ggplot(data = plotData,
aes(x = F2_b,
y = F1_b,
color = isOutlier)) +
geom_point(shape = 21, data = plotData %>%
dplyr::filter(isOutlier == "Removed")) +
geom_point(shape = 21, data = plotData %>%
dplyr::filter(isOutlier == "Retained")) +
scale_y_reverse() +
scale_x_reverse() +
scale_color_manual(values = myPal) +
theme_classic() + labs(title = paste("Step #1:\nVoiced Segments")) +
xlab("F2 (bark)") +
ylab("F1 (bark)") +
theme(plot.title = element_text(hjust = 0.5),
aspect.ratio = 1,
legend.title = element_blank(),
legend.text = element_text(size=12))
# Step 2: MAD ----
plotData <- Formants_PRAAT %>%
dplyr::filter(!is.na(Pitch)) %>%
dplyr::mutate(F1_mad = (abs(F1_Hz - median(F1_Hz))/ mad(F1_Hz, constant = 1.4826)) > 2.5,
F2_mad = (abs(F2_Hz - median(F2_Hz))/ mad(F2_Hz, constant = 1.4826)) > 2.5,
isOutlier = case_when(
F1_mad == TRUE | F2_mad == TRUE ~ "Removed",
TRUE ~ "Retained"
))
f3 <- ggplot(data = plotData,
aes(x = F2_b,
y = F1_b,
color = isOutlier)) +
geom_point(shape = 21, data = plotData %>%
dplyr::filter(isOutlier == "Removed")) +
geom_point(shape = 21, data = plotData %>%
dplyr::filter(isOutlier == "Retained")) +
scale_y_reverse() +
scale_x_reverse() +
scale_color_manual(values = myPal) +
theme_classic() +
labs(title = paste("Step #2:\nMedian Absolute Deviation")) +
xlab("F2 (bark)") +
ylab("F1 (bark)") +
theme(plot.title = element_text(hjust = 0.5),
aspect.ratio = 1,
legend.title = element_blank(),
legend.text = element_text(size=12))
# Step 3: Mahalanhobis Distance ----
plotData <- Formants_PRAAT %>%
dplyr::filter(!is.na(Pitch)) %>%
dplyr::mutate(F1_mad = (abs(F1_Hz - median(F1_Hz))/ mad(F1_Hz, constant = 1.4826)) > 2.5,
F2_mad = (abs(F2_Hz - median(F2_Hz))/ mad(F2_Hz, constant = 1.4826)) > 2.5) %>%
dplyr::filter(F1_mad == FALSE & F2_mad == FALSE) %>%
dplyr::mutate(mDist = mahalanobis(cbind(.$F1_Hz, .$F2_Hz),
colMeans(cbind(.$F1_Hz, .$F2_Hz)),
cov = cov(cbind(.$F1_Hz, .$F2_Hz))),
mDist_sd = abs(scale(mDist,center = T)),
isOutlier = case_when(
mDist_sd < 2 ~ "Retained",
TRUE ~ "Removed"
))
f4 <- ggplot(data = plotData,
aes(x = F2_b,
y = F1_b,
color = isOutlier)) +
geom_point(shape = 21, data = plotData %>%
dplyr::filter(isOutlier == "Removed")) +
geom_point(shape = 21, data = plotData %>%
dplyr::filter(isOutlier == "Retained")) +
scale_y_reverse() +
scale_x_reverse() +
scale_color_manual(values = myPal) +
theme_classic() + labs(title = paste("Step #3:\nMahalanobis Distance")) + xlab("F2 (bark)") + ylab("F1 (bark)") +
theme(plot.title = element_text(hjust = 0.5),
aspect.ratio = 1,
legend.title = element_blank(),
legend.text = element_text(size=12))
# Final Formants ----
plotData <- Formants_PRAAT %>%
dplyr::filter(!is.na(Pitch)) %>%
dplyr::mutate(F1_mad = (abs(F1_Hz - median(F1_Hz))/ mad(F1_Hz, constant = 1.4826)) > 2.5,
F2_mad = (abs(F2_Hz - median(F2_Hz))/ mad(F2_Hz, constant = 1.4826)) > 2.5) %>%
dplyr::filter(F1_mad == FALSE & F2_mad == FALSE) %>%
dplyr::mutate(mDist = mahalanobis(cbind(.$F1_Hz, .$F2_Hz),
colMeans(cbind(.$F1_Hz, .$F2_Hz)),
cov = cov(cbind(.$F1_Hz, .$F2_Hz))),
mDist_sd = abs(scale(mDist,center = T))) %>%
dplyr::filter(mDist_sd < 2)
f5 <- ggplot(aes(x=F2_b,
y=F1_b),
data = plotData,
inherit.aes = FALSE) +
geom_point(shape = 21, color = myPal[2]) +
scale_y_reverse() +
scale_x_reverse() +
theme_classic() + labs(title = paste("Final Formant Values")) + xlab("F2 (bark)") + ylab("F1 (bark)") +
theme(plot.title = element_text(hjust = 0.5),
aspect.ratio = 1, legend.title = element_blank())
# Comibing plots
filteredPlot <- f1 + f2 + f3 + f4 + f5 + patchwork::guide_area() +
patchwork::plot_layout(guides = 'collect',
ncol = 3)
filteredPlot
ggsave(plot = filteredPlot, "Plots/Filtered Formants.png",
height = 6,
width = 8,
units = "in",
scale = .9)
plotData_Int <- AcousticData %>%
dplyr::filter(!grepl("_rel", Speaker)) %>%
dplyr::group_by(Speaker) %>%
dplyr::mutate(segMin = base::min(VAS, transAcc),
segMax = base::max(VAS, transAcc),
ratingAvg = mean(VAS, transAcc, na.rm = T),
Speaker = as.factor(Speaker),
Etiology = as.factor(Etiology)) %>%
arrange(segMax)
my_pal <- c("#f26430", "#272D2D","#256eff")
# With a bit more style
plot_Int <- ggplot(plotData_Int) +
geom_segment(aes(x = fct_inorder(Speaker),
xend = Speaker,
y = segMin,
yend = segMax,
color = Etiology)) +
geom_point(aes(x = Speaker,
y = VAS,
color = Etiology),
#color = my_pal[1],
size = 3,
shape = 19) +
geom_point(aes(x = Speaker,
y = transAcc,
color = Etiology),
#color = my_pal[2],
size = 3,
shape = 15) +
coord_flip()+
theme_classic() +
theme(
legend.position = "none",
panel.border = element_blank(),
) +
xlab("") +
ylab("Speech Intelligibility") +
ggtitle("Speech Intelligibility") +
ylim(c(0,100))
plot_Int
myPal <- c("#1AAD77", "#1279B5", "#FFBF00", "#FD7853", "#BF3178")
myShapes <- c(16, 18, 17, 15)
scatter1 <- ggplot(plotData_Int,
aes(x = VAS,
y = transAcc,
color = Etiology,
shape = Etiology,
linetype = Etiology)) +
geom_point() +
#geom_smooth(method = "lm", se = F) +
geom_abline(intercept = 0, slope = 1) +
coord_cartesian(xlim = c(0,100), ylim = c(0,100)) +
labs(x = "Intelligibility (VAS)", y = "Intelligibility (OT)") +
scale_color_manual(values = myPal) +
scale_shape_manual(values = myShapes) +
theme_classic() +
theme(aspect.ratio=1,
legend.position="right")
scatter2 <- ggplot(plotData_Int,
aes(x = VAS,
y = transAcc,
color = Etiology,
shape = Etiology,
linetype = Etiology)) +
#geom_point() +
geom_smooth(method = "lm", se = F) +
geom_abline(intercept = 0, slope = 1) +
coord_cartesian(xlim = c(0,100), ylim = c(0,100)) +
labs(x = "Intelligibility (VAS)", y = "Intelligibility (OT)") +
scale_color_manual(values = myPal) +
scale_shape_manual(values = myShapes) +
theme_classic() +
theme(aspect.ratio=1,
legend.position="right")
combinedScatter <- ggarrange(scatter1,
scatter2,
common.legend = F,
ncol = 2,
nrow = 1)
combinedScatter
ggsave(filename = "Plots/OT and VAS Scatterplot.png",
plot = combinedScatter,
height = 2,
width = 6,
units = "in",
scale = 1)
rm(scatter1, scatter2, combinedScatter)
Model Scatterplot
modelFigureData <- AcousticData %>%
dplyr::filter(!grepl("_rel",Speaker)) %>%
dplyr::select(Speaker, Etiology, Sex, VSA_b, vowel_ED_b, Hull_b, Hull_bVSD_25, Hull_bVSD_75, VAS, transAcc) %>%
dplyr::mutate(Speaker = as.factor(Speaker),
Etiology = as.factor(Etiology),
Sex = as.factor(Sex)) %>%
tidyr::pivot_longer(cols = VAS:transAcc, names_to = "IntType", values_to = "Int") %>%
dplyr::mutate(IntType = case_when(
IntType == "transAcc" ~ "OT",
TRUE ~ "VAS"
),
IntType = as.factor(IntType))
ylabel <- "Intelligibility"
myPal <- c("#2D2D37", "#1279B5")
myPalShape <- c(19, 1)
VSA <- modelFigureData %>%
ggplot() +
aes(x = VSA_b,
y = Int,
color = IntType,
shape = IntType,
linetype = IntType) +
geom_point() +
geom_smooth(method = "lm", se = T, fill = "light grey") +
xlab(expression("VSA (Bark"^2*")")) +
ylab(ylabel) +
coord_cartesian(ylim = c(0,100)) +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5, face = "bold"),
aspect.ratio=1) +
scale_color_manual(values = myPal) +
scale_shape_manual(values = myPalShape) +
labs(color="Intelligibility Type",
shape = "Intelligibility Type",
linetype = "Intelligibility Type")
disp <- modelFigureData %>%
ggplot() +
aes(x = vowel_ED_b,
y = Int,
color = IntType,
shape = IntType,
linetype = IntType) +
geom_point() +
geom_smooth(method = "lm", se = T, fill = "light grey") +
xlab("Corner Dispersion (Bark)") +
ylab(ylabel) +
coord_cartesian(ylim = c(0,100)) +
theme_classic() +
theme(aspect.ratio=1) +
scale_color_manual(values = myPal) +
scale_shape_manual(values = myPalShape) +
labs(color="Intelligibility Type",
shape = "Intelligibility Type",
linetype = "Intelligibility Type")
Hull <- modelFigureData %>%
ggplot() +
aes(x = Hull_b,
y = Int,
color = IntType,
shape = IntType,
linetype = IntType) +
geom_point() +
geom_smooth(method = "lm", se = T, fill = "light grey") +
xlab(expression("VSA"[Hull]*" (Bark"^2*")")) +
ylab(ylabel) +
coord_cartesian(ylim = c(0,100)) +
theme_classic() +
theme(aspect.ratio=1) + theme(legend.position = "none") +
scale_color_manual(values = myPal) +
scale_shape_manual(values = myPalShape) +
labs(color="Intelligibility Type",
shape = "Intelligibility Type",
linetype = "Intelligibility Type")
vsd25 <- modelFigureData %>%
ggplot() +
aes(x = Hull_bVSD_25,
y = Int,
color = IntType,
shape = IntType,
linetype = IntType) +
geom_point() +
geom_smooth(method = "lm", se = T, fill = "light grey") +
xlab(expression("VSD"[25]*" (Bark"^2*")")) +
ylab(ylabel) +
coord_cartesian(ylim = c(0,100)) +
theme_classic() +
theme(aspect.ratio=1) + theme(legend.position = "none") +
scale_color_manual(values = myPal) +
scale_shape_manual(values = myPalShape) +
labs(color="Intelligibility Type",
shape = "Intelligibility Type",
linetype = "Intelligibility Type")
vsd75 <- modelFigureData %>%
ggplot() +
aes(x = Hull_bVSD_75,
y = Int,
color = IntType,
shape = IntType,
linetype = IntType) +
geom_point() +
geom_smooth(method = "lm", se = T, fill = "light grey") +
xlab(expression("VSD"[75]*" (Bark"^2*")")) +
ylab(ylabel) +
coord_cartesian(ylim = c(0,100)) +
theme_classic() +
theme(aspect.ratio=1) + theme(legend.position = "none") +
scale_color_manual(values = myPal) +
scale_shape_manual(values = myPalShape) +
labs(color="Intelligibility Type",
shape = "Intelligibility Type",
linetype = "Intelligibility Type")
# Creating OT Scatterplot Figure
scatter <- VSA + disp + patchwork::guide_area() + Hull + vsd25 + vsd75 +
patchwork::plot_layout(guides = 'collect',
ncol = 3)
scatter
ggsave("Plots/ModelFigure.png", scatter,
height = 4,
width = 6,
units = "in",
scale = 1.1)
ListenerDemo <- Listeners %>%
furniture::table1(age, gender, race, ethnicity)
ListenerDemo
SpeakerDemo <- AcousticData %>%
dplyr::select(c(Speaker, Sex, Etiology))
Ages <- rio::import("Prepped Data/Speaker Ages.xlsx")
SpeakerDemo <- full_join(SpeakerDemo, Ages, by = "Speaker")
SpeakerDemoInfo <- SpeakerDemo %>%
furniture::table1(Sex, Etiology, Age, na.rm = F)
SpeakerDemoInfo
SpeakerDemo %>%
dplyr::summarize(mean_age = mean(Age, na.rm = T), age_sd = sd(Age, na.rm = T), age_range = range(Age, na.rm = T))